/* ********************************************************************************************************************I********* */
/* ******************* Constructs a dataset containing vote alpha and EDGAR views at the fund-family level  *************I*********/
/* ********************************************************************************************************************I********* */

**** Step 1: Generate a file that contains meeting dates and proxy filings, forms;
data forms; set drop.edgar_indices;
  accession = substr(accession, 13);
  slash = index(accession, "/");
  accession = substr(accession, slash+1);
  dot = find(accession, ".");
  accession = substr(accession, 1, dot-1);
  if substr(form, 1, 3) = 'DEF' and index(form, '14') > 1 then def = 1; 
  if substr(form, 1, 3) = 'PRE' and index(form, '14') > 1 then prelim = 1;
  datefile = left(compress(datefile,''));
  if def = 1 or prelim = 1 then proxy = 1;
  if proxy = 1; 

  if length(datefile) = 8 then do;
    year = substr(datefile, 1, 4)*1;
	month = substr(datefile, 5, 2)*1;
	day = substr(datefile, 7, 2)*1; end;
  if length(datefile) = 10 then do;
    year = substr(datefile, 1, 4)*1;
	month = substr(datefile, 6, 2)*1;
	day = substr(datefile, 9, 2)*1; end;
  fdate = mdy(month, day, year);
  format fdate date9.;
  drop slash dot month day year;

proc sql;
  create table forms as
  select a.*, b.meetingdate, b.permno, b.gvkey, intck('day', a.fdate, b.meetingdate) as interval
  from forms as a left join temp.cst_matched as b
  on input(a.cika, best.) = input(b.cik, best.) and 1 <= intck('day', a.fdate, b.meetingdate) <= 182;
quit; * Company proxy statements are typically released between 40 and 45 days prior to the meeting;
* proxy statements filed within 6 months of annual meetings;

proc sort; by cika accession interval; 

data forms; set forms; by cika accession interval; 
  if first.accession;
  cik = input(cika, best.) ;
  keep cik accession permno meetingdate interval proxy def prelim fdate;

proc sort data = forms nodupkey out = cik_meetingdates (keep=cik meetingdate permno); by cik meetingdate; where cik ne . and meetingdate ne .;
run;


**** Step 2: Link ISS to IP ranges;
proc sql;
  create table arin_iss_link as
  select a.institutionID, a.institutionName, a.num_votes, a.st_date, a.end_dt, a.orgid, a.orgname, b.netrange, b.regdate, b.updated
  from drop.link_arin_iss as a left join arin.net_ipv4 as b
  on a.orgid = b.orgid;
quit;

data arin_iss_link2; set arin_iss_link;
  dash = index(netrange, "-");
  netrange_st = left(substr(netrange, 1, dash-2));
  netrange_end = left(substr(netrange, dash+2));
  first = input(substr(netrange_st, 1, index(netrange_st, ".")-1), best.);
  temp = substr(netrange_st, index(netrange_st, ".")+1);
  second = input(substr(temp, 1, index(temp, ".")-1), best.);
  temp = substr(temp, index(temp, ".")+1);
  third = input(substr(temp, 1, index(temp, ".")-1), best.);
  fourth = input(substr(temp, index(temp, ".")+1), best.);
  netrange_st2 = first*1000000000+second*1000000+third*1000+fourth; 
  first = input(substr(netrange_end, 1, index(netrange_end, ".")-1), best.);
  temp = substr(netrange_end, index(netrange_end, ".")+1);
  second = input(substr(temp, 1, index(temp, ".")-1), best.);
  temp = substr(temp, index(temp, ".")+1);
  third = input(substr(temp, 1, index(temp, ".")-1), best.);
  fourth = input(substr(temp, index(temp, ".")+1), best.);
  netrange_end2 = first*1000000000+second*1000000+third*1000+fourth; 
  keep institutionID institutionName num_votes st_date end_dt OrgID Orgname netrange_st netrange_end netrange_st2 netrange_end2; run;

proc sort data = arin_iss_link2 nodupkey; by institutionID num_votes st_date end_dt OrgID OrgName netrange_st netrange_end; 
run;


%macro byyear;
  %do y = 2003 %to 2017; 

**** Step 3: translate the octets and match with ISS institutions;
proc sql;
  create table proxy_log_ins_&y as
  select a.*, substr(ip, length(ip)-2, 3) as octets, b.proxy, b.fdate, b.def, b.prelim, b.meetingdate, b.interval
  from drop.proxy_log_ins_&y (keep=cik accession date ip crawler) as a left join forms as b
  on a.accession = b.accession;
quit;

proc sql;
  create table proxy_log_ins_&y as
  select a.*, b.num
  from proxy_log_ins_&y as a left join drop.octets as b
  on a.octets = b.code;
quit;

proc sort nodupkey; by accession date ip; 
* collapse same day views of the same proxy statement into one: if the same ip address views a given proxy statement multiple times on a given day, it is counted as one view;

data proxy_log_ins_&y; set proxy_log_ins_&y;
  ip2 = compress(tranwrd(ip, substr(ip,length(ip)-2), num),'');
  first = input(substr(ip2, 1, index(ip2, ".")-1), best.);
  temp = substr(ip2, index(ip2, ".")+1);
  second = input(substr(temp, 1, index(temp, ".")-1), best.);
  temp = substr(temp, index(temp, ".")+1);
  third = input(substr(temp, 1, index(temp, ".")-1), best.);
  fourth = input(substr(temp, index(temp, ".")+1), best.);
  ip_num = first*1000000000+second*1000000+third*1000+fourth; 
  drop first temp second third fourth num;

proc sql;
  create table proxy_log_ins_&y as
  select *
  from arin_iss_link2 as a left join proxy_log_ins_&y as b
  on a.netrange_st2 <= b.ip_num <= a.netrange_end2;
quit;

proc sort data = proxy_log_ins_&y nodupkey; by institutionID OrgId OrgName date ip_num accession; 

proc means data = proxy_log_ins_&y noprint; by institutionID OrgId OrgName;
  var ip_num; 
  output out = iss_a_&y (drop=_type_ _freq_) n = num_views_all;

*** views before shareholder meetings;
proc sql;
  create table proxy_log_ins_&y as
  select a.*, b.meetingdate
  from proxy_log_ins_&y as a, cik_meetingdates as b
  where a.cik = b.cik and b.meetingdate-75 <= a.date <= b.meetingdate and a.proxy = 1;
quit;* we focus on a window from t-75 to t, where t is the date of the annual meeting;

proc sort nodupkey; by institutionID OrgId OrgName date ip_num accession; 

proc means data = proxy_log_ins_&y noprint; by institutionID OrgId OrgName;
  var ip_num; 
  output out = iss_b_&y (drop=_type_ _freq_) n = num_views_proxy;

proc sort nodupkey data = proxy_log_ins_&y; by institutionID OrgId OrgName ip_num; 
  where proxy = 1;

proc means data = proxy_log_ins_&y noprint; by institutionID OrgId OrgName;
  var ip_num; 
  output out = iss_c_&y (drop=_type_ _freq_) n = num_ips_proxy;

data iss_&y; merge iss_a_&y iss_b_&y iss_c_&y; by institutionID OrgId OrgName;
  year = &y;

proc append base=temp.edgar_views data=iss_&y force;
run;

%end;
%mend;

%byyear;

data edgar_views; set temp.edgar_views;
  if num_views_all ne 0;
  if num_views_proxy = . then num_views_proxy = 0;
  if num_ips_proxy = . then num_ips_proxy = 0;
run;

proc sql;
  create table proxy_views_fund as
  select a.*, b.institutionName, b.fundid, b.fundname, b.crsp_fundno, b.wficn, b.crsp_cl_grp
  from edgar_views as a, drop.iss_crspmf as b
  where a.institutionID = b.institutionID; 
quit;

proc sort nodupkey; by wficn crsp_cl_grp year orgID; 
run;

proc means data = proxy_views_fund noprint; by wficn crsp_cl_grp year;
  var num_views_all num_views_proxy num_ips_proxy;
  output out = proxy_views_fund (drop=_type_ _freq_) sum =  num_views_all num_views_proxy num_ips_proxy;

data proxy_views_fund2; set proxy_views_fund;
  year = year+1;

proc sort; by wficn crsp_cl_grp year; 
run;

proc sort data = temp.informed_fund_level; by wficn crsp_cl_grp year;

data proxy_views; merge temp.informed_fund_level (in=a) proxy_views_fund2; by wficn crsp_cl_grp year;
  if a;
  log_num_views_proxy = log(1+num_views_proxy);
  log_num_ips_proxy = log(1+num_ips_proxy);

proc sort data = proxy_views; by mgmt_no2 year descending mtna; run;

data proxy_views2; set proxy_views; by mgmt_no2 year descending mtna;
  if first.year; 
  keep mgmt_no2 year num_views_proxy fsize log_num_views_proxy num_ips_proxy log_num_ips_proxy;
run;


**** Compute vote alpha at the family level;
proc sort data = temp.informed_fund_level; by mgmt_no2 year;

proc means data = temp.informed_fund_level noprint; by mgmt_no2 year;
  var vote_alpha vote_alpha_lag voteagainstmgmt_all iss_conform_all in_minority_non_con; weight mtna;
  output out = mgmt_co_level1a (drop=_type_ _freq_) mean = vote_alpha vote_alpha_lag voteagainstmgmt_all iss_conform_all in_minority_non_con;

proc means data = temp.informed_fund_level noprint; by mgmt_no2 year;
  var num_pr1yr;
  output out = mgmt_co_level1b (drop=_type_ _freq_) sum = num_pr1yr_f;
run;

**** Compute number of shareholder meetings at the family level;
proc sort data = temp.iss_votes_ret (keep=wficn crsp_cl_grp year permno meetingdate) nodupkey out = num_meetings; by wficn crsp_cl_grp year permno meetingdate;

proc sql;
  create table num_meetings as
  select a.*, b.mgmt_no2
  from num_meetings as a left join temp.informed_fund_level as b
  on a.wficn = b.wficn and a.crsp_cl_grp = b.crsp_cl_grp and a.year = b.year;
quit;

proc sort; by mgmt_no2 year permno meetingdate; where mgmt_no2 ne '';

proc means noprint; by mgmt_no2 year;
  var permno;
  output out = num_meetings2 (drop=_type_ _freq_) n = num_meetings;
run;

data mgmt_co_level2; merge proxy_views2 mgmt_co_level1a mgmt_co_level1b num_meetings2; by mgmt_no2 year;
  log_num_pr1yr_f = log(num_pr1yr_f);
  log_num_meetings = log(num_meetings);
  num_views_proxy_scaled = num_views_proxy/num_meetings;
  num_ips_proxy_scaled = num_ips_proxy/num_meetings;
  f_tna = exp(fsize);
run;

proc sort; by year;

proc rank data = mgmt_co_level2 out = mgmt_co_level2 groups = 5; by year;
  var num_ips_proxy_scaled num_views_proxy_scaled;
  ranks num_ips_proxy_ind num_views_proxy_ind;

data temp.mgmt_co_level; set mgmt_co_level2;
  if num_ips_proxy_ind in (0,1,2,3) then num_ips_proxy_ind = 0; else if num_ips_proxy_ind = 4 then num_ips_proxy_ind = 1;
  if num_views_proxy_ind in (0,1,2,3) then num_views_proxy_ind = 0; else if num_views_proxy_ind = 4 then num_views_proxy_ind = 1;
run;

proc export data= temp.mgmt_co_level
            outfile= "D:\Dropbox\InformedVoting\mgmt_co_level.dta" 
            dbms=stata replace;
run;
